package com.shujia.spark.sql

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

object Demo5Submit {
  def main(args: Array[String]): Unit = {
    val spark: SparkSession = SparkSession
      .builder()
      .appName("submit")
      .getOrCreate()

    //导入隐式转换
    import spark.implicits._
    //导入spark 所有的函数
    import org.apache.spark.sql.functions._

    //读取数据
    val LinesDF: DataFrame = spark
      .read
      .format("csv")
      .option("sep", "\t")
      .schema("lines String")
      .load("/data/words")
    //指定hdfs的路径

    //计算
    val wordCount: DataFrame = LinesDF
      //展开单词
      .select(explode(split($"lines", ",")) as "word")
      .groupBy($"word")
      .agg(count($"word") as "c")

    //保存数据
    wordCount
      .write
      .format("csv")
      .option("sep","\t")
      .mode(SaveMode.Overwrite)
      .save("/data/wc")

    /**
      * spark -submit -class com.shujia.spark.sql.Demo5Submit -master yarn-client spark-examples_2.11-2.4.5.jar
      */
  }
}
